This Rmarkdown document contains the code to generate the community-level analyses of the EHI DNA extraction method test.
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(ape))
suppressPackageStartupMessages(library(distillR))
suppressPackageStartupMessages(library(phyloseq))
suppressPackageStartupMessages(library(hilldiv2))
suppressPackageStartupMessages(library(ggh4x))
suppressPackageStartupMessages(library(tinytable))
suppressPackageStartupMessages(library(vegan))
species="Calotriton asper"
genus=strsplit(species, " ")[[1]][1] %>% tolower()
sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
rename(dataset=Dataset) %>%
filter(Species == species)
read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0121/DMB0121_counts.tsv.gz") %>%
rename(genome = 1)
genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0121/DMB0121_mag_info.tsv.gz") %>%
rename(genome = 1, length=mag_size)
genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0121/DMB0121_coverage.tsv.gz") %>%
rename(genome = 1)
download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0121/DMB0121.tree.gz", "data/DMB0121.tree.gz")
genome_tree <- read_tree("data/DMB0121.tree.gz")
#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
mutate(across(-1, ~ . * read_counts[[cur_column()]]))
# Transform to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Stacked barplot
genome_counts %>%
mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE))) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(y = "Relative abundance") +
facet_nested(. ~ Sample + Extraction, scales="free_x") +
guides(fill = guide_legend(ncol = 3)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
legend.position="none",
legend.title=element_blank())
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=0) %>%
t() %>%
as.data.frame() %>%
rename(richness=1) %>%
rownames_to_column(var="dataset")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1) %>%
t() %>%
as.data.frame() %>%
rename(neutral=1) %>%
rownames_to_column(var="dataset")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1,tree=genome_tree) %>%
t() %>%
as.data.frame() %>%
rename(phylogenetic=1) %>%
rownames_to_column(var="dataset")
# Merge alpha diversities
alpha_diversity <- richness %>%
full_join(neutral,by=join_by(dataset==dataset)) %>%
full_join(phylogenetic,by=join_by(dataset==dataset))
# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))
# Print alpha diversity
alpha_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
group_by(Extraction) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| Extraction | richness | neutral | phylogenetic |
|---|---|---|---|
| E1 | 107.0 | 51.75880 | 6.406283 |
| E2 | 97.5 | 48.75158 | 6.219160 |
| E3 | 106.5 | 51.56757 | 6.311931 |
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C", out="pair")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C", out="pair")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")
# Merge beta diversities
beta_diversity <- richness %>%
full_join(neutral,by=c("first", "second")) %>%
full_join(phylogenetic,by=c("first", "second")) %>%
rename(richness=C.x, neutral=C.y, phylogenetic=C)
# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))
# Print beta diversities
beta_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
mutate(relationship = case_when(
Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
TRUE ~ NA_character_ # Handle other cases if needed
)) %>%
filter(relationship != is.na(relationship)) %>%
group_by(relationship) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| relationship | richness | neutral | phylogenetic |
|---|---|---|---|
| inter | 0.29497046 | 0.381599435 | 0.105560612 |
| intra | 0.03215577 | 0.007477988 | 0.001740429 |
richness <-genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
write_tsv(paste0("results/var_",genus,".tsv"))
species="Lissotriton helveticus"
genus=strsplit(species, " ")[[1]][1] %>% tolower()
sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
rename(dataset=Dataset) %>%
filter(Species == species)
read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0124/DMB0124_counts.tsv.gz") %>%
rename(genome = 1)
genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0124/DMB0124_mag_info.tsv.gz") %>%
rename(genome = 1, length=mag_size)
genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0124/DMB0124_coverage.tsv.gz") %>%
rename(genome = 1)
download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0124/DMB0124.tree.gz", "data/DMB0124.tree.gz")
genome_tree <- read_tree("data/DMB0124.tree.gz")
#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
mutate(across(-1, ~ . * read_counts[[cur_column()]]))
# Transform to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Stacked barplot
genome_counts %>%
mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE))) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(y = "Relative abundance") +
facet_nested(. ~ Sample + Extraction, scales="free_x") +
guides(fill = guide_legend(ncol = 3)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
legend.position="none",
legend.title=element_blank())
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=0) %>%
t() %>%
as.data.frame() %>%
rename(richness=1) %>%
rownames_to_column(var="dataset")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1) %>%
t() %>%
as.data.frame() %>%
rename(neutral=1) %>%
rownames_to_column(var="dataset")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1,tree=genome_tree) %>%
t() %>%
as.data.frame() %>%
rename(phylogenetic=1) %>%
rownames_to_column(var="dataset")
# Merge alpha diversities
alpha_diversity <- richness %>%
full_join(neutral,by=join_by(dataset==dataset)) %>%
full_join(phylogenetic,by=join_by(dataset==dataset))
# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))
# Print alpha diversity
alpha_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
group_by(Extraction) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| Extraction | richness | neutral | phylogenetic |
|---|---|---|---|
| E1 | 22 | 12.05083 | 4.439034 |
| E2 | 21 | 10.73360 | 3.336904 |
| E3 | 22 | 12.15999 | 4.408855 |
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C", out="pair")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C", out="pair")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")
# Merge beta diversities
beta_diversity <- richness %>%
full_join(neutral,by=c("first", "second")) %>%
full_join(phylogenetic,by=c("first", "second")) %>%
rename(richness=C.x, neutral=C.y, phylogenetic=C)
# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))
# Print beta diversities
beta_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
mutate(relationship = case_when(
Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
TRUE ~ NA_character_ # Handle other cases if needed
)) %>%
filter(relationship != is.na(relationship)) %>%
group_by(relationship) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| relationship | richness | neutral | phylogenetic |
|---|---|---|---|
| inter | 0.9090909 | 0.9487207 | 0.3681096 |
| intra | 0.2288066 | 0.1798833 | 0.1289869 |
richness <-genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
write_tsv(paste0("results/var_",genus,".tsv"))
species="Salamandra atra"
genus=strsplit(species, " ")[[1]][1] %>% tolower()
sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
rename(dataset=Dataset) %>%
filter(Species == species)
read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0129/DMB0129_counts.tsv.gz") %>%
rename(genome = 1)
genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0129/DMB0129_mag_info.tsv.gz") %>%
rename(genome = 1, length=mag_size)
genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0129/DMB0129_coverage.tsv.gz") %>%
rename(genome = 1)
download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0129/DMB0129.tree.gz", "data/DMB0129.tree.gz")
genome_tree <- read_tree("data/DMB0129.tree.gz")
#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
mutate(across(-1, ~ . * read_counts[[cur_column()]]))
# Transform to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Stacked barplot
genome_counts %>%
mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE))) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(y = "Relative abundance") +
facet_nested(. ~ Sample + Extraction, scales="free_x") +
guides(fill = guide_legend(ncol = 3)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
legend.position="none",
legend.title=element_blank())
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=0) %>%
t() %>%
as.data.frame() %>%
rename(richness=1) %>%
rownames_to_column(var="dataset")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1) %>%
t() %>%
as.data.frame() %>%
rename(neutral=1) %>%
rownames_to_column(var="dataset")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1,tree=genome_tree) %>%
t() %>%
as.data.frame() %>%
rename(phylogenetic=1) %>%
rownames_to_column(var="dataset")
# Merge alpha diversities
alpha_diversity <- richness %>%
full_join(neutral,by=join_by(dataset==dataset)) %>%
full_join(phylogenetic,by=join_by(dataset==dataset))
# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))
# Print alpha diversity
alpha_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
group_by(Extraction) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| Extraction | richness | neutral | phylogenetic |
|---|---|---|---|
| E1 | 70.0 | 35.33416 | 6.024485 |
| E2 | 58.5 | 30.15278 | 5.653610 |
| E3 | 84.0 | 38.29817 | 6.183859 |
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C", out="pair")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C", out="pair")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")
# Merge beta diversities
beta_diversity <- richness %>%
full_join(neutral,by=c("first", "second")) %>%
full_join(phylogenetic,by=c("first", "second")) %>%
rename(richness=C.x, neutral=C.y, phylogenetic=C)
# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))
# Print beta diversities
beta_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
mutate(relationship = case_when(
Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
TRUE ~ NA_character_ # Handle other cases if needed
)) %>%
filter(relationship != is.na(relationship)) %>%
group_by(relationship) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| relationship | richness | neutral | phylogenetic |
|---|---|---|---|
| inter | 0.3854701 | 0.3862921 | 0.125862853 |
| intra | 0.1422510 | 0.0315443 | 0.007370115 |
richness <-genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
write_tsv(paste0("results/var_",genus,".tsv"))
species="Geospizopsis unicolor"
genus=strsplit(species, " ")[[1]][1] %>% tolower()
sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
rename(dataset=Dataset) %>%
filter(Species == species)
read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0123/DMB0123_counts.tsv.gz") %>%
rename(genome = 1)
genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0123/DMB0123_mag_info.tsv.gz") %>%
rename(genome = 1, length=mag_size)
genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0123/DMB0123_coverage.tsv.gz") %>%
rename(genome = 1)
download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0123/DMB0123.tree.gz", "data/DMB0123.tree.gz")
genome_tree <- read_tree("data/DMB0123.tree.gz")
#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
mutate(across(-1, ~ . * read_counts[[cur_column()]]))
# Transform to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Stacked barplot
genome_counts %>%
mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE))) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(y = "Relative abundance") +
facet_nested(. ~ Sample + Extraction, scales="free_x") +
guides(fill = guide_legend(ncol = 3)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
legend.position="none",
legend.title=element_blank())
Alpha diversity is not calculated as only one sample contains enough genome coverage to calculate diversity estimates.
Beta diversity is not calculated as only one sample contains enough genome coverage to calculate diversity estimates.
species="Perisoreus infaustus"
genus=strsplit(species, " ")[[1]][1] %>% tolower()
sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
rename(dataset=Dataset) %>%
filter(Species == species)
read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0126/DMB0126_counts.tsv.gz") %>%
rename(genome = 1)
genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0126/DMB0126_mag_info.tsv.gz") %>%
rename(genome = 1, length=mag_size)
genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0126/DMB0126_coverage.tsv.gz") %>%
rename(genome = 1)
download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0126/DMB0126.tree.gz", "data/DMB0126.tree.gz")
genome_tree <- read_tree("data/DMB0126.tree.gz")
#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
mutate(across(-1, ~ . * read_counts[[cur_column()]]))
# Transform to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Stacked barplot
genome_counts %>%
mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE))) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(y = "Relative abundance") +
facet_nested(. ~ Sample + Extraction, scales="free_x") +
guides(fill = guide_legend(ncol = 3)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
legend.position="none",
legend.title=element_blank())
Alpha diversity is not calculated as only one sample contains enough genome coverage to calculate diversity estimates.
Beta diversity is not calculated as only one sample contains enough genome coverage to calculate diversity estimates.
The number of reconstructed genomes was too low for community analyses.
species="Plecotus auritus"
genus=strsplit(species, " ")[[1]][1] %>% tolower()
sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
rename(dataset=Dataset) %>%
filter(Species == species)
read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0127/DMB0127_counts.tsv.gz") %>%
rename(genome = 1)
genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0127/DMB0127_mag_info.tsv.gz") %>%
rename(genome = 1, length=mag_size)
genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0127/DMB0127_coverage.tsv.gz") %>%
rename(genome = 1)
download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0127/DMB0127.tree.gz", "data/DMB0127.tree.gz")
genome_tree <- read_tree("data/DMB0127.tree.gz")
#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
mutate(across(-1, ~ . * read_counts[[cur_column()]]))
# Transform to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Stacked barplot
genome_counts %>%
mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE))) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(y = "Relative abundance") +
facet_nested(. ~ Sample + Extraction, scales="free_x") +
guides(fill = guide_legend(ncol = 3)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
legend.position="none",
legend.title=element_blank())
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=0) %>%
t() %>%
as.data.frame() %>%
rename(richness=1) %>%
rownames_to_column(var="dataset")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1) %>%
t() %>%
as.data.frame() %>%
rename(neutral=1) %>%
rownames_to_column(var="dataset")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1,tree=genome_tree) %>%
t() %>%
as.data.frame() %>%
rename(phylogenetic=1) %>%
rownames_to_column(var="dataset")
# Merge alpha diversities
alpha_diversity <- richness %>%
full_join(neutral,by=join_by(dataset==dataset)) %>%
full_join(phylogenetic,by=join_by(dataset==dataset))
# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))
# Print alpha diversity
alpha_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
group_by(Extraction) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| Extraction | richness | neutral | phylogenetic |
|---|---|---|---|
| E1 | 12.5 | 5.070885 | 2.691939 |
| E2 | 6.5 | 4.042292 | 2.282549 |
| E3 | 11.5 | 5.348040 | 2.739107 |
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C", out="pair")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C", out="pair")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")
# Merge beta diversities
beta_diversity <- richness %>%
full_join(neutral,by=c("first", "second")) %>%
full_join(phylogenetic,by=c("first", "second")) %>%
rename(richness=C.x, neutral=C.y, phylogenetic=C)
# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))
# Print beta diversities
beta_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
mutate(relationship = case_when(
Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
TRUE ~ NA_character_ # Handle other cases if needed
)) %>%
filter(relationship != is.na(relationship)) %>%
group_by(relationship) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| relationship | richness | neutral | phylogenetic |
|---|---|---|---|
| inter | 0.6727759 | 0.82731396 | 0.68954392 |
| intra | 0.2123748 | 0.03113448 | 0.01336019 |
richness <-genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
write_tsv(paste0("results/var_",genus,".tsv"))
species="Sciurus carolinensis"
genus=strsplit(species, " ")[[1]][1] %>% tolower()
sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
rename(dataset=Dataset) %>%
filter(Species == species)
read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0130/DMB0130_counts.tsv.gz") %>%
rename(genome = 1)
genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0130/DMB0130_mag_info.tsv.gz") %>%
rename(genome = 1, length=mag_size)
genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0130/DMB0130_coverage.tsv.gz") %>%
rename(genome = 1)
download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0130/DMB0130.tree.gz", "data/DMB0130.tree.gz")
genome_tree <- read_tree("data/DMB0130.tree.gz")
#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
mutate(across(-1, ~ . * read_counts[[cur_column()]]))
# Transform to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Stacked barplot
genome_counts %>%
mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE))) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(y = "Relative abundance") +
facet_nested(. ~ Sample + Extraction, scales="free_x") +
guides(fill = guide_legend(ncol = 3)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
legend.position="none",
legend.title=element_blank())
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=0) %>%
t() %>%
as.data.frame() %>%
rename(richness=1) %>%
rownames_to_column(var="dataset")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1) %>%
t() %>%
as.data.frame() %>%
rename(neutral=1) %>%
rownames_to_column(var="dataset")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1,tree=genome_tree) %>%
t() %>%
as.data.frame() %>%
rename(phylogenetic=1) %>%
rownames_to_column(var="dataset")
# Merge alpha diversities
alpha_diversity <- richness %>%
full_join(neutral,by=join_by(dataset==dataset)) %>%
full_join(phylogenetic,by=join_by(dataset==dataset))
# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))
# Print alpha diversity
alpha_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
group_by(Extraction) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| Extraction | richness | neutral | phylogenetic |
|---|---|---|---|
| E1 | 95.0 | 39.66320 | 5.557244 |
| E2 | 107.5 | 59.13437 | 6.138243 |
| E3 | 96.0 | 47.74310 | 5.728024 |
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C", out="pair")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C", out="pair")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")
# Merge beta diversities
beta_diversity <- richness %>%
full_join(neutral,by=c("first", "second")) %>%
full_join(phylogenetic,by=c("first", "second")) %>%
rename(richness=C.x, neutral=C.y, phylogenetic=C)
# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))
# Print beta diversities
beta_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
mutate(relationship = case_when(
Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
TRUE ~ NA_character_ # Handle other cases if needed
)) %>%
filter(relationship != is.na(relationship)) %>%
group_by(relationship) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| relationship | richness | neutral | phylogenetic |
|---|---|---|---|
| inter | 0.4447768 | 0.6554629 | 0.19033972 |
| intra | 0.1823044 | 0.1211974 | 0.03562178 |
richness <-genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
write_tsv(paste0("results/var_",genus,".tsv"))
These data are pending to be produced.
species="Chalcides striatus"
genus=strsplit(species, " ")[[1]][1] %>% tolower()
sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
rename(dataset=Dataset) %>%
filter(Species == species)
read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0122/DMB0122_counts.tsv.gz") %>%
rename(genome = 1)
genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0122/DMB0122_mag_info.tsv.gz") %>%
rename(genome = 1, length=mag_size)
genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0122/DMB0122_coverage.tsv.gz") %>%
rename(genome = 1)
download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0122/DMB0122.tree.gz", "data/DMB0122.tree.gz")
genome_tree <- read_tree("data/DMB0122.tree.gz")
#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
mutate(across(-1, ~ . * read_counts[[cur_column()]]))
# Transform to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Stacked barplot
genome_counts %>%
mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE))) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(y = "Relative abundance") +
facet_nested(. ~ Sample + Extraction, scales="free_x") +
guides(fill = guide_legend(ncol = 3)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
legend.position="none",
legend.title=element_blank())
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=0) %>%
t() %>%
as.data.frame() %>%
rename(richness=1) %>%
rownames_to_column(var="dataset")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1) %>%
t() %>%
as.data.frame() %>%
rename(neutral=1) %>%
rownames_to_column(var="dataset")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1,tree=genome_tree) %>%
t() %>%
as.data.frame() %>%
rename(phylogenetic=1) %>%
rownames_to_column(var="dataset")
# Merge alpha diversities
alpha_diversity <- richness %>%
full_join(neutral,by=join_by(dataset==dataset)) %>%
full_join(phylogenetic,by=join_by(dataset==dataset))
# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))
# Print alpha diversity
alpha_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
group_by(Extraction) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| Extraction | richness | neutral | phylogenetic |
|---|---|---|---|
| E1 | 60.5 | 31.68716 | 5.314585 |
| E2 | 74.0 | 37.04400 | 5.254353 |
| E3 | 64.5 | 34.15145 | 5.180680 |
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C", out="pair")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C", out="pair")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")
# Merge beta diversities
beta_diversity <- richness %>%
full_join(neutral,by=c("first", "second")) %>%
full_join(phylogenetic,by=c("first", "second")) %>%
rename(richness=C.x, neutral=C.y, phylogenetic=C)
# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))
# Print beta diversities
beta_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
mutate(relationship = case_when(
Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
TRUE ~ NA_character_ # Handle other cases if needed
)) %>%
filter(relationship != is.na(relationship)) %>%
group_by(relationship) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| relationship | richness | neutral | phylogenetic |
|---|---|---|---|
| inter | 0.4109306 | 0.45373698 | 0.09334837 |
| intra | 0.1286148 | 0.06449944 | 0.02626687 |
richness <-genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
write_tsv(paste0("results/var_",genus,".tsv"))
species="Natrix astreptophora"
genus=strsplit(species, " ")[[1]][1] %>% tolower()
sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
rename(dataset=Dataset) %>%
filter(Species == species)
read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0125/DMB0125_counts.tsv.gz") %>%
rename(genome = 1)
genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0125/DMB0125_mag_info.tsv.gz") %>%
rename(genome = 1, length=mag_size)
genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0125/DMB0125_coverage.tsv.gz") %>%
rename(genome = 1)
download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0125/DMB0125.tree.gz", "data/DMB0125.tree.gz")
genome_tree <- read_tree("data/DMB0125.tree.gz")
#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
mutate(across(-1, ~ . * read_counts[[cur_column()]]))
# Transform to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Stacked barplot
genome_counts %>%
mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE))) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(y = "Relative abundance") +
facet_nested(. ~ Sample + Extraction, scales="free_x") +
guides(fill = guide_legend(ncol = 3)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
legend.position="none",
legend.title=element_blank())
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=0) %>%
t() %>%
as.data.frame() %>%
rename(richness=1) %>%
rownames_to_column(var="dataset")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1) %>%
t() %>%
as.data.frame() %>%
rename(neutral=1) %>%
rownames_to_column(var="dataset")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1,tree=genome_tree) %>%
t() %>%
as.data.frame() %>%
rename(phylogenetic=1) %>%
rownames_to_column(var="dataset")
# Merge alpha diversities
alpha_diversity <- richness %>%
full_join(neutral,by=join_by(dataset==dataset)) %>%
full_join(phylogenetic,by=join_by(dataset==dataset))
# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))
# Print alpha diversity
alpha_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
group_by(Extraction) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| Extraction | richness | neutral | phylogenetic |
|---|---|---|---|
| E1 | 37.5 | 16.30449 | 3.270384 |
| E2 | 24.0 | 13.43764 | 3.065722 |
| E3 | 29.5 | 15.34095 | 3.211162 |
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C", out="pair")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C", out="pair")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")
# Merge beta diversities
beta_diversity <- richness %>%
full_join(neutral,by=c("first", "second")) %>%
full_join(phylogenetic,by=c("first", "second")) %>%
rename(richness=C.x, neutral=C.y, phylogenetic=C)
# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))
# Print beta diversities
beta_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
mutate(relationship = case_when(
Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
TRUE ~ NA_character_ # Handle other cases if needed
)) %>%
filter(relationship != is.na(relationship)) %>%
group_by(relationship) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| relationship | richness | neutral | phylogenetic |
|---|---|---|---|
| inter | 0.5386629 | 0.65025916 | 0.18128644 |
| intra | 0.2756370 | 0.04665216 | 0.01166948 |
richness <-genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
write_tsv(paste0("results/var_",genus,".tsv"))
species="Podarcis muralis"
genus=strsplit(species, " ")[[1]][1] %>% tolower()
sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
rename(dataset=Dataset) %>%
filter(Species == species)
read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0128/DMB0128_counts.tsv.gz") %>%
rename(genome = 1)
genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0128/DMB0128_mag_info.tsv.gz") %>%
rename(genome = 1, length=mag_size)
genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0128/DMB0128_coverage.tsv.gz") %>%
rename(genome = 1)
download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0128/DMB0128.tree.gz", "data/DMB0128.tree.gz")
genome_tree <- read_tree("data/DMB0128.tree.gz")
#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
mutate(across(-1, ~ . * read_counts[[cur_column()]]))
# Transform to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Stacked barplot
genome_counts %>%
mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE))) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(y = "Relative abundance") +
facet_nested(. ~ Sample + Extraction, scales="free_x") +
guides(fill = guide_legend(ncol = 3)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
legend.position="none",
legend.title=element_blank())
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=0) %>%
t() %>%
as.data.frame() %>%
rename(richness=1) %>%
rownames_to_column(var="dataset")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1) %>%
t() %>%
as.data.frame() %>%
rename(neutral=1) %>%
rownames_to_column(var="dataset")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1,tree=genome_tree) %>%
t() %>%
as.data.frame() %>%
rename(phylogenetic=1) %>%
rownames_to_column(var="dataset")
# Merge alpha diversities
alpha_diversity <- richness %>%
full_join(neutral,by=join_by(dataset==dataset)) %>%
full_join(phylogenetic,by=join_by(dataset==dataset))
# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))
# Print alpha diversity
alpha_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
group_by(Extraction) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| Extraction | richness | neutral | phylogenetic |
|---|---|---|---|
| E1 | 80.5 | 33.09492 | 4.689994 |
| E2 | 78.5 | 34.42602 | 4.745518 |
| E3 | 73.0 | 32.71816 | 4.716865 |
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C", out="pair")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C", out="pair")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")
# Merge beta diversities
beta_diversity <- richness %>%
full_join(neutral,by=c("first", "second")) %>%
full_join(phylogenetic,by=c("first", "second")) %>%
rename(richness=C.x, neutral=C.y, phylogenetic=C)
# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))
# Print beta diversities
beta_diversity %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
mutate(relationship = case_when(
Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
TRUE ~ NA_character_ # Handle other cases if needed
)) %>%
filter(relationship != is.na(relationship)) %>%
group_by(relationship) %>%
summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
tt()
| relationship | richness | neutral | phylogenetic |
|---|---|---|---|
| inter | 0.2864067 | 0.41508917 | 0.122770382 |
| intra | 0.0446703 | 0.00905735 | 0.003367558 |
richness <-genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=0, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, tree=genome_tree, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
broom::tidy() %>%
filter(term == "Extraction") %>%
select(R2) %>% pull()
tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
write_tsv(paste0("results/var_",genus,".tsv"))